******************************************************************************************
* ONLINE APPENDIX FIGURE A12, PANEL B
* 1) ESTIMATE THE EFFECT OF THE VENEZUELAN IMMIGRATION ON WAGES, EMPLOYMENT, 
* UNEMPLOYMENT, AND LABOR FORCE PARTIPATION OF NATIVES INCLUDING LOG GDP AT THE STATE
* LEVEL AS ADDITIONAL CONTROL. 
*  
* LAST MODIFIED: JULY, 2025
******************************************************************************************

clear all 

* INSTALL SCHEMES FOR GRAPHS
*ssc install blindschemes, replace all 
*ssc install schemepack, replace /*based on https://github.com/asjadnaqvi/Stata-schemes*/

**# DIRECTORIES

* PAPER DIRECTORY DROPBOX
global fiscal "/Users/andres/Library/CloudStorage/Dropbox/2 Papers/2023/Paper_Fiscal_Costs"

* SUBDIRECTORIES
global data "$fiscal/1 Data/12 Final Datasets"
global programs "$fiscal/2 Programs/2 Figures and Tables paper"
global results "$fiscal/3 Results"

**# LOAD DATASET AND CREATE NEW VARIABLES

* LOAD DATASET WITH VARIABLES NEEDED
use "$data/LaborEffects.dta", clear

**#SAMPLE SELECTION 

* ONLY METROPOLITAN AREAS 
drop if area >= 81 /* not observations for these areas*/
drop if area == 25 /* not observations */
keep if area !=. 

* DUMMIES FOR YEAR AND MSA
tab year, gen(year)
tab area, gen(area)

* KEEP ONLY WORKING-AGE POPULATION
drop if (age<15|age>64) 

* KEEP IF NOT ENROLLED IN SCHOOL
*keep if att_school == 2 

* KEEP THOSE THAT WORK LESS THAN 100 HOURS A WEEK  
*drop if weekly_hours>100

* DELIMIT THE ANALYSIS FOR THE YEARS 2013-2018. 
drop if year > 2018

* DISPLAY FORMAT FOR SAMPLING WEIGHTS 
format %12.0f fex12

* GENERATE ROUNDED WEIGHTS FOR SPECIFIC COMMANDS THAT REQUIRE THEM 
gen fex12_round = fex12
replace fex12_round = round(fex12_round) 

* GENERATE LOG GDP
gen log_gdp = log(gdp)

**# GLOBAL MACROS FOR VARIABLES
global controls year2-year6 area2-area23 i.sex1 age c.age#c.age educ2-educ5 log_gdp
global x        mig_share2013
global z1       IV2005
global z2       IVdist
global w        fex12

**# ESTIMATION RESULTS

**## 1) EFFECT OF THE VENEZUELAN IMMIGRATION ON NATIVES' UNEMPLOYMENT

matrix A = J(3,3,.) /* create matrix to save coefficients and confidence intervals*/
matrix colnames A = b ll ul 
matrix rownames A = OLS IV1 IV2 

**### OLS
ivreg2 unemp $x $controls if labforce == 1 & area != . & immig == 0 [pw = $w], cluster(area)  partial(area2-area23) small 
matrix A[1,1] = _b[mig_share2013]
matrix A[1,2] = _b[mig_share2013] - invttail(e(df_r),0.025)*_se[mig_share2013]
matrix A[1,3] = _b[mig_share2013] + invttail(e(df_r),0.025)*_se[mig_share2013]

**### IV1 (ENCLAVE INSTRUMENT)
ivreg2 unemp ($x = $z1) $controls if labforce == 1 & area != . & immig == 0 [pw = $w], first cluster(area) partial(area2-area23) small 
matrix A[2,1] = _b[mig_share2013]
matrix A[2,2] = _b[mig_share2013 ] - invttail(e(df_r),0.025)*_se[mig_share2013]
matrix A[2,3] = _b[mig_share2013 ] + invttail(e(df_r),0.025)*_se[mig_share2013]

**### IV2 (DISTANCE INSTRUMENT)
ivreg2 unemp ($x = $z2) $controls if labforce == 1 & area != . & immig == 0 [pw = $w], first cluster(area) partial(area2-area23) small 
matrix A[3,1] = _b[mig_share2013]
matrix A[3,2] = _b[mig_share2013] - invttail(e(df_r),0.025)*_se[mig_share2013]
matrix A[3,3] = _b[mig_share2013] + invttail(e(df_r),0.025)*_se[mig_share2013]


**## 2) EFFECT OF THE VENEZUELAN IMMIGRATION ON NATIVES' EMPLOYMENT

matrix B = J(3,3,.)
matrix colnames B = b ll ul 
matrix rownames B = OLS IV1 IV2 

**### OLS
ivreg2 emp $x $controls if working_age == 1 & area != . & immig == 0 [pw = $w], cluster(area)  partial(area2-area23) small 
matrix B[1,1] = _b[mig_share2013]
matrix B[1,2] = _b[mig_share2013] - invttail(e(df_r),0.025)*_se[mig_share2013]
matrix B[1,3] = _b[mig_share2013] + invttail(e(df_r),0.025)*_se[mig_share2013]

**### IV1 (ENCLAVE INSTRUMENT)
ivreg2 emp ($x = $z1) $controls if working_age == 1 & area != . & immig == 0 [pw = $w], first cluster(area) partial(area2-area23) small 
matrix B[2,1] = _b[mig_share2013]
matrix B[2,2] = _b[mig_share2013 ] - invttail(e(df_r),0.025)*_se[mig_share2013]
matrix B[2,3] = _b[mig_share2013 ] + invttail(e(df_r),0.025)*_se[mig_share2013]

**### IV2 (DISTANCE INSTRUMENT)
ivreg2 emp ($x = $z2) $controls if working_age == 1 & area != . & immig == 0 [pw = $w], first cluster(area) partial(area2-area23) small 
matrix B[3,1] = _b[mig_share2013]
matrix B[3,2] = _b[mig_share2013 ] - invttail(e(df_r),0.025)*_se[mig_share2013]
matrix B[3,3] = _b[mig_share2013 ] + invttail(e(df_r),0.025)*_se[mig_share2013]


**##  3) EFFECT OF THE VENEZUELAN IMMIGRATION ON NATIVES' WAGES 
* NOTE: USUALLY THE SAMPLE IS RESTRICTED TO NATIVES IN MSAs WHO ARE 
* NOT ATTENDING SCHOOL AND NOT SELF-EMPLOYED. HOWEVER, MOST THE 
* PAPERS EXPLORING SIMILAR TOPICS FOR COLOMBIA DON'T DO SO.  
* ADDING THESE SAMPLE RESTRICTIONS DON'T CHANGE THE OVERALL CONCLUSIONS OF THE DOCUMENT. 

**### SAMPLE CONSIDERATIONS WAGE ESTIMATIONS

preserve

* KEEP IF NOT ENROLLED IN SCHOOL
*keep if att_school == 2 

* KEEP IF NOT SELF-EMPLOYED 
*keep if emp_type != 4

* KEEP ONLY SALARIED WORKERS
*keep if emp_type == 1

* KEEP THOSE THAT WORK LESS THAN 100 HOURS A WEEK  
*drop if weekly_hours>100

* HANDLING OUTLIERS
levelsof year, local(year)
foreach j of local year {
display "this is year `j'"
centile hourly_real_wages if year == `j', cent(0.5 99.5)
return list 
local cent1 = r(c_1)
display `cent1'
local cent99 = r(c_2)
display `cent99'
* keep only de wage distribution between 1 and 99
drop if hourly_real_wages > `cent99'  
drop if hourly_real_wages < `cent1' 
}

matrix C = J(3,3,.)
matrix colnames C = b ll ul 
matrix rownames C = OLS IV1 IV2 

**### OLS
ivreg2 log_hourly_real_wages $x $controls if emp == 1 & immig == 0 & area != . [pw = $w], cluster(area)  partial(area2-area23) small 
matrix C[1,1] = _b[mig_share2013]
matrix C[1,2] = _b[mig_share2013] - invttail(e(df_r),0.025)*_se[mig_share2013]
matrix C[1,3] = _b[mig_share2013] + invttail(e(df_r),0.025)*_se[mig_share2013]

**### IV1 (ENCLAVE INSTRUMENT)
ivreg2 log_hourly_real_wages ($x = $z1) $controls if emp == 1 & immig == 0 & area != . [pw = $w], first cluster(area)  partial(area2-area23) small 
matrix C[2,1] = _b[mig_share2013]
matrix C[2,2] = _b[mig_share2013 ] - invttail(e(df_r),0.025)*_se[mig_share2013]
matrix C[2,3] = _b[mig_share2013 ] + invttail(e(df_r),0.025)*_se[mig_share2013]

**### IV2 (DISTANCE INSTRUMENT)
ivreg2 log_hourly_real_wages ($x = $z2) $controls if emp == 1 & immig == 0 & area != . [pw = $w], first cluster(area)  partial(area2-area23) small 
matrix C[3,1] = _b[mig_share2013]
matrix C[3,2] = _b[mig_share2013 ] - invttail(e(df_r),0.025)*_se[mig_share2013]
matrix C[3,3] = _b[mig_share2013 ] + invttail(e(df_r),0.025)*_se[mig_share2013]

restore /*restore sample before considering wage outliers*/


**## 4: EFFECT OF THE VENEZUELAN IMMIGRATION ON NATIVES' LABOR FORCE PARTICIPATION

matrix D = J(3,3,.)
matrix colnames D = b ll ul
matrix rownames D = OLS IV1 IV2 

**### OLS
ivreg2 labforce $x $controls if immig == 0 & area != .  [pw = $w], cluster(area)  partial(area2-area23) small 
matrix D[1,1] = _b[mig_share2013]
matrix D[1,2] = _b[mig_share2013] - invttail(e(df_r),0.025)*_se[mig_share2013]
matrix D[1,3] = _b[mig_share2013] + invttail(e(df_r),0.025)*_se[mig_share2013]

**### IV1 (ENCLAVE INSTRUMENT)
ivreg2 labforce  ($x = $z1) $controls if immig == 0 & area != .  [pw = $w], first cluster(area) partial(area2-area23) small 
matrix D[2,1] = _b[mig_share2013]
matrix D[2,2] = _b[mig_share2013 ] - invttail(e(df_r),0.025)*_se[mig_share2013]
matrix D[2,3] = _b[mig_share2013 ] + invttail(e(df_r),0.025)*_se[mig_share2013]

**### IV2 (DISTANCE INSTRUMENT)
ivreg2  labforce  ($x = $z2) $controls if immig == 0 & area != . [pw = $w], first cluster(area) partial(area2-area23) small 
matrix D[3,1] = _b[mig_share2013]
matrix D[3,2] = _b[mig_share2013 ] - invttail(e(df_r),0.025)*_se[mig_share2013]
matrix D[3,3] = _b[mig_share2013 ] + invttail(e(df_r),0.025)*_se[mig_share2013]


**# PLOTTING EFFECTS

**## WAGES

*  T-BASED CIs
coefplot (matrix(C[,1]), ci((C[,2] C[,3])) ///
ciopts(recast(rcap))), ///
yline(0) vertical ///
coeflabel(1 r1 = "OLS" ///
		  2 r2 = "IV: enclave" ///
		  3 r3 = "IV: distance" , labsize(small)) ///
ytitle("Effect on log hourly wages", size(medsmall))  ///
graphregion(color(white)) scheme(plottig) ///
		title("A. Wages", size(medium))
		 tempfile results1
		 graph save "`results1'"


**# UNEMPLOYMENT 
		 
* T-BASED CIs
coefplot (matrix(A[,1]), ci((A[,2] A[,3])) ///
ciopts(recast(rcap))), ///
yline(0) vertical ///
coeflabel(1 r1 = "OLS" ///
		  2 r2 = "IV: enclave" ///
		  3 r3 = "IV: distance" , labsize(small)) ///
ytitle("Effect on P(unemployment = 1)", size(medsmall))  ///
graphregion(color(white)) scheme(plottig) ///
title("B. Unemployment", size(medium))
tempfile results2
graph save "`results2'"

**# EMPLOYMENT

* T-BASED CIs
coefplot (matrix(B[,1]), ci((B[,2] B[,3])) ///
ciopts(recast(rcap))), ///
yline(0) vertical ///
coeflabel(1 r1 = "OLS" ///
		  2 r2 = "IV: enclave" ///
		  3 r3 = "IV: distance" , labsize(small)) ///
ytitle("Effect on P(employment = 1)", size(medsmall))  ///
graphregion(color(white)) scheme(plottig) ///
		title("C. Employment", size(medium))
		 tempfile results3
		 graph save "`results3'"		 

		 
**# LABOR FORCE PARTICIPATION

* T-BASED CIs
coefplot (matrix(D[,1]), ci((D[,2] D[,3])) ///
ciopts(recast(rcap))), ///
yline(0) vertical ///
coeflabel(1 r1 = "OLS" ///
		  2 r2 = "IV: enclave" ///
		  3 r3 = "IV: distance" , labsize(small)) ///
ytitle("Effect on P(labor force = 1)", size(medsmall))  ///
graphregion(color(white)) scheme(plottig) ///
		title("D. Labor force", size(medium))
		 tempfile results4
		 graph save "`results4'"

		 
**# FIGURE 4 PAPER: LABOR MARKET EFFECTS (T-BASED CIs)
graph combine "`results1'" "`results2'" "`results3'" "`results4'" ///
, graphregion(color(white)) cols(2) rows(2) xcommon  iscale(0.72) scheme(plottig)
qui graph save "$results/1 Figures Paper/FigA12_B.gph", replace 
qui graph export "$results/1 Figures Paper/FigA12_B.eps", as(eps) replace



